# Replication for:
# SHARPENED PENCILS: NEW PARTIES AND OLD ELITES AFTER DEMOCRATIZATION
# Tyler Brown, University of Konstanz, Dept. of Politics and Public Administration

# loading packages
library(tidyverse)
library(patchwork)
library(fixest)
library(stargazer)
library(car)
library(latex2exp)

# loading final analysis file
cands_covs <- read_csv('5_replication/brown_analysis_sharpenedpencils_submit.csv')


# selecting on necessary variables
df <- cands_covs %>% select(
  teryt,
  elect_year,
  main_party, 
  all_of(starts_with('party')), # all party-level results
  all_of(starts_with('elect')), # all election-level results
  pop_tot,
  pop_old,
  all_of(starts_with('ent')),
  net_migration,
  WOJ, 
  woj_1975_real,
  comm_office, 
  all_of(starts_with('p.'))
)


# setting design parameters
tex_val <- T
dig_num <- 3
sig_codes <- c('***' = 0.001, '**' = 0.01, '*' = 0.05, '.' = 0.10)
var_labs <- c(
  "p.ipn_sb" = "SB",
  "p.ipn_pzpr" = "PZPR",
  "I(party_num_cand/elect_num_cand)" = "Rel. list size",
  "I(party_num_incumbent/party_num_cand)" = "Prop. incumbents",
  "I(p.ipn_sb+p.ipn_pzpr)" = 'SB + PZPR',
  'party_camp_fe' = 'Party Camp',
  'elect_year' = 'year',
  'woj_1975_real' = 'Voivodeship',
  'clr_vote' = 'Vote Share (CLR transformed)'
)


# making model data
model_data <- df %>% 
  arrange(teryt, elect_year) %>% 
  group_by(teryt) %>% 
  fill(pop_tot, .direction = 'up') %>%  # imputing 1994 population with 1998 population in case where pop is missing (534 elections)
  ungroup %>% 
  select(party_prop_votes, main_party, pop_tot, elect_num_cand, party_num_incumbent,party_num_cand, party, p.ipn_sb, p.ipn_pzpr, teryt, elect_year, party_camp_fe, woj_1975_real) %>% 
  filter(complete.cases(.))


# CLR transform
model_data <- model_data %>% 
  group_by(teryt, elect_year) %>% 
  mutate(geom_mean = prod(party_prop_votes + (10^-6))) %>% 
  mutate(geom_mean = geom_mean^(1/n())) %>% 
  mutate(clr_vote = log((party_prop_votes + (10^-6))/geom_mean)) 

# compressed beta transform
model_data <- model_data %>% 
  ungroup %>% 
  mutate(beta_votes =  (party_prop_votes * (n() - 1) + 0.5) / n())


# H1: Combined effects

m1_votescomb <- model_data %>% 
  feols(
    clr_vote ~ I(p.ipn_sb + p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe,
    cluster = ~party_camp_fe^woj_1975_real
  ) 

m2_votescomb <- model_data %>% 
  feols(
    clr_vote ~ I(p.ipn_sb + p.ipn_pzpr) + I(party_num_cand/elect_num_cand)| elect_year + woj_1975_real^party_camp_fe,
    cluster = ~party_camp_fe^woj_1975_real
  ) 


m3_votescomb <- model_data %>% 
  feols(
  clr_vote ~ I(p.ipn_sb + p.ipn_pzpr) + I(party_num_incumbent/party_num_cand) + I(party_num_cand/elect_num_cand) | elect_year + woj_1975_real^party_camp_fe,
  cluster = ~party_camp_fe^woj_1975_real
) 


# TABLE 4: OLS estimates for H1
models_clr_comb <- mget(paste0('m', 1:3, '_votescomb'))
etable(models_clr_comb, tex = tex_val, signif.code = sig_codes, dict = var_labs,
       digits = dig_num)


# H2 mechanism effects
m1_votes <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb) | elect_year + woj_1975_real^party_camp_fe, cluster = ~party_camp_fe^woj_1975_real) 

m2_votes <- model_data %>% 
  feols(clr_vote ~ (p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe, cluster = ~party_camp_fe^woj_1975_real) 

m3_votes <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb + p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe, cluster = ~party_camp_fe^woj_1975_real) 


m4_votes <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_cand/elect_num_cand) | elect_year + woj_1975_real^party_camp_fe, cluster = ~party_camp_fe^woj_1975_real) 


m5_votes <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_incumbent/party_num_cand) + I(party_num_cand/elect_num_cand) | elect_year + woj_1975_real^party_camp_fe,cluster = ~party_camp_fe^woj_1975_real) 


# TABLE 5
models_clr <- mget(paste0('m', 1:5, '_votes'))
etable(models_clr, tex = tex_val, signif.code = sig_codes, dict = var_labs,
       digits = dig_num)

# standardized effect size from Model 3:
t_pzpr <- summary(m3_votes)$coeftable[, "t value"]['p.ipn_pzpr']
degfreedom <- m3_votes$nobs - m3_votes$nparams
partial_r <- sqrt(t_pzpr^2 / (t_pzpr^2 + degfreedom))

partial_r

# fisher transformation
z_r <- 0.5*log((1+partial_r)/(1-partial_r))


se_z_r <- 1/sqrt(dim(model_data)[1] - 3)

# test statistic and 95 percent C.I.
z_r
z_r - 1.96*se_z_r 
z_r + 1.96*se_z_r




## FIGURES

# Figure 1

df %>% 
  group_by(party, elect_year) %>% 
  filter(main_party == 1) %>% 
  summarise(p = sum(party_num_ipn_list, na.rm = T)/sum(party_num_cand, na.rm = T),
            PZPR = sum(party_num_ipn_list_party, na.rm = T)/sum(party_num_cand, na.rm = T),
            SB = sum(party_num_ipn_list_bezpieka, na.rm = T)/sum(party_num_cand, na.rm = T),
            n = n()) %>% 
  filter(n > 100) %>%
  pivot_longer(!c(party, elect_year, n, p)) %>% 
  mutate(name = factor(name, levels = c('SB', 'PZPR'))) %>% 
  ggplot(aes(x = reorder(paste0(party, ' ', elect_year), p), y = value))+
  geom_bar(stat = 'identity', color = 'black', width = 0.7, aes(fill = name), linewidth = 0) +
  labs(x = 'electoral committee, year', y = 'proportion of insiders', title = 'Former SB/PZPR candidates by party (1994—2018)', fill = 'Organization')+
  theme_minimal()+
  scale_fill_manual(values = c('forestgreen','lightblue'))+
  theme(legend.position = 'right',
        axis.text.x = element_text(face = 'bold', color = 'black'),
        axis.text.y = element_text(face = 'bold', color = 'black'),
        legend.text = element_text(face = 'bold', color = 'black'),
        legend.title.position = 'top')+
  scale_y_continuous(expand = c(-2,0.08))+
  coord_flip()
ggsave('4_plots/desc_sb_pzpr_props.pdf')



ipn_plot_data <- cands_covs %>%
  select(party_camp, party_camp_fe, elect_year, p.ipn_sb, p.ipn_pzpr) %>%
  mutate(mix = (p.ipn_pzpr/(p.ipn_sb + p.ipn_pzpr))) %>% 
  pivot_longer(cols = c(p.ipn_sb, p.ipn_pzpr, mix)) %>%
  filter(!is.na(value)) %>% 
  mutate(party_camp = ifelse(is.na(party_camp) | party_camp == 'newright', 'z_all_other', party_camp))


ipn_plot_data %>% 
  mutate(asp = ifelse(party_camp == 'socsuccessor', 'ASP', 'non-regime')) %>% 
  mutate(asp = ifelse(is.na(asp), 'non-regime', asp)) %>%
  group_by(elect_year, name) %>% 
  do(broom::tidy(leveneTest(value ~ asp, ., center = median))) %>%  # brown forszthe test for difference in variances to see if ANOVA is approp --> NO.
  ungroup() %>%
  mutate(sig = case_when(
    p.value < sig_codes[1] ~ names(sig_codes)[1],
    p.value < sig_codes[2]  ~ names(sig_codes)[2],
    p.value < sig_codes[3]  ~ names(sig_codes)[3],
    p.value < sig_codes[4]  ~ names(sig_codes)[4],
    TRUE            ~ "n.s."
  ))

# variances are not equal, distributions not normal -- need to do Wilcoxon test for difference in means
ipn_wilcox <- ipn_plot_data %>% 
  mutate(asp = ifelse(party_camp == 'socsuccessor', 'ASP', 'non-regime')) %>% 
  mutate(asp = ifelse(is.na(asp), 'non-regime', asp)) %>%
  group_by(elect_year, name) %>% 
  do(broom::tidy(wilcox.test(value ~ asp, data = .))) %>% 
  ungroup() %>%
  mutate(sig = case_when(
    p.value < sig_codes[1] ~ names(sig_codes)[1],
    p.value < sig_codes[2]  ~ names(sig_codes)[2],
    p.value < sig_codes[3]  ~ names(sig_codes)[3],
    p.value < sig_codes[4]  ~ names(sig_codes)[4],
    TRUE            ~ "n.s."
  ))

ipn_props <- ipn_plot_data %>% 
  mutate(asp = ifelse(party_camp == 'socsuccessor', 'ASP', 'non-regime')) %>% 
  mutate(asp = ifelse(is.na(asp), 'non-regime', asp)) %>%
  filter(!name == 'mix') %>% 
  ggplot(aes(x = as.factor(elect_year), y = value, color = asp, shape = asp))+
  stat_summary_bin(size = 0.4, position = position_dodge(width = 0.2))+
  #stat_summary_bin(geom = 'errorbar', position = position_dodge(width = 0.2), width = 0.2)+
  facet_wrap(~name, ncol = 3, scales = 'free_y',
             labeller = as_labeller(
               c(`mix` = '(A) PZPR share of insiders', 
                 `p.ipn_pzpr` = '(B) PZPR share of candidates', 
                 `p.ipn_sb` = '(C) SB share of candidates')))+
  scale_color_manual(values = c('darkred','steelblue'))+
  scale_shape_manual(values = c(16, 17)) + # set point types here

  theme_bw()+
  theme(legend.position = 'bottom',legend.title.position = 'left',)+
  labs(shape = 'Party Family',x = '',y = 'proportion')+
  guides(shape = guide_legend(override.aes = list(color = c('darkred', 'steelblue'))), color = 'none') + 
  
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
  scale_y_continuous(expand = c(0.2,0))+
  geom_text(
    data = ipn_wilcox %>% filter(!name == 'mix'),
    aes(x = as.factor(elect_year), y = 0, label = sig),inherit.aes = FALSE,size = 5,vjust = 1,hjust = 0,color = "black"
  )


ipn_mix <- ipn_plot_data %>% 
  mutate(asp = ifelse(party_camp == 'socsuccessor', 'ASP', 'non-regime')) %>% 
  mutate(asp = ifelse(is.na(asp), 'non-regime', asp)) %>%
  filter(name == 'mix') %>% 
  ggplot(aes(x = as.factor(elect_year), y = value, color = asp, shape = asp))+
  stat_summary_bin(size = 0.4, position = position_dodge(width = 0.2))+
  #stat_summary_bin(geom = 'errorbar', position = position_dodge(width = 0.2), width = 0.2)+
  facet_wrap(~name, ncol = 3, scales = 'free_y',
             labeller = as_labeller(
               c(`mix` = '(A) PZPR share of insiders', 
                 `p.ipn_pzpr` = '(B) PZPR share of candidates', 
                 `p.ipn_sb` = '(C) SB share of candidates')))+
  scale_color_manual(values = c('darkred','steelblue'))+
  scale_shape_manual(values = c(16, 17)) + # set point types here
  theme_bw()+
  theme(legend.position = 'bottom',legend.title.position = 'left',)+
  labs(color = 'Party Family',x = '',y = 'proportion')+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), legend.position ='none')+
  guides(shape = guide_legend(override.aes = list(color = c('darkred', 'steelblue')))) + 
  scale_y_continuous(expand = c(0.2,0))+
  geom_text(
    data = ipn_wilcox %>% filter(name == 'mix'),
    aes(x = as.factor(elect_year), y = 0, label = sig),inherit.aes = FALSE,size = 5,vjust = 1,hjust = 0,color = "black"
  )


# FIGURE 3
ipn_mix/ipn_props

ggsave('4_plots/ipn_asp_desc.pdf')


# APPENDIX MATERIALS
# descriptive statistics

cands_covs %>% 
  ggplot(aes(x = elect_year, y = party_prop_votes, color = party_camp))+
  stat_summary_bin()+
  stat_summary_bin(geom = 'line')+
  scale_color_manual(
    values = c(
      'goldenrod',
      'steelblue',
      'darkred',
      'darkgreen',
      'grey'
    ),
    labels = c(
      'New Right',
      'Post-Authoritarian Opposition',
      'Successor Parties',
      'Traditionalist',
      'All other parties'
    )
  )+
  geom_hline(yintercept = 0)+
  theme_bw()+
  theme(legend.position = 'bottom',
        legend.title.position = 'left',
  )+
  labs(color = 'Party Family',
       x = 'year',
       y = 'mean vote share',
       title = 'Mean vote share for party families over time')
ggsave('4_plots/mean_partyfam_voteshare.pdf')

cands_covs %>% 
  ggplot(aes(x = elect_year, y = (party_num_incumbent_reelected/party_num_incumbent), color = party_camp))+
  stat_summary_bin()+
  stat_summary_bin(geom = 'line')+
  scale_color_manual(
    values = c(
      'goldenrod',
      'steelblue',
      'darkred',
      'darkgreen',
      'grey'
    ),
    labels = c(
      'New Right',
      'Post-Authoritarian Opposition',
      'Successor Parties',
      'Traditionalist',
      'All other parties'
    )
  )+
  geom_hline(yintercept = 0)+
  theme_bw()+
  theme(legend.position = 'bottom',
        legend.title.position = 'left',
  )+
  labs(color = 'Party Family',
       x = 'year',
       y = 'proportion reelected',
       title = 'Incumbent reelection probability for party families over time')
ggsave('4_plots/mean_partyfam_incumbreelect.pdf')



## electoral institutions
m1_robsmall <- model_data %>% 
  filter(pop_tot <= 20000) %>% 
  feols(
    clr_vote ~ (p.ipn_sb + p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe,
    cluster = ~party_camp_fe^woj_1975_real
  ) 

m2_robsmall <- model_data %>% 
  filter(pop_tot <= 20000) %>% 
  feols(
    clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_cand/elect_num_cand) | elect_year + woj_1975_real^party_camp_fe,
    cluster = ~party_camp_fe^woj_1975_real
  ) 

m3_robsmall <- model_data %>% 
  filter(pop_tot <= 20000) %>% 
  feols(
    clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_incumbent/party_num_cand) + I(party_num_cand/elect_num_cand)| elect_year + woj_1975_real^party_camp_fe,
    cluster = ~party_camp_fe^woj_1975_real
  ) 

m1_robbig <- model_data %>% 
  filter(pop_tot > 20000) %>% 
  feols(
    clr_vote ~ (p.ipn_sb + p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe,
    cluster = ~party_camp_fe^woj_1975_real
  ) 

m2_robbig <- model_data %>% 
  filter(pop_tot > 20000) %>% 
  feols(
    clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_cand/elect_num_cand)| elect_year + woj_1975_real^party_camp_fe,
    cluster = ~party_camp_fe^woj_1975_real
  ) 

m3_robbig <- model_data %>% 
  filter(pop_tot > 20000) %>% 
  feols(
    clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_incumbent/party_num_cand) + I(party_num_cand/elect_num_cand)| elect_year + woj_1975_real^party_camp_fe,
    cluster = ~party_camp_fe^woj_1975_real
  ) 


model_robsmall <- mget(paste0('m', 1:3, '_robsmall'))
etable(model_robsmall, tex = tex_val, signif.code = sig_codes,
       dict = var_labs,
       digits = dig_num)

model_robbig <- mget(paste0('m', 1:3, '_robbig'))
etable(model_robbig, tex = tex_val, signif.code = c('**' = 0.01, '*' = 0.05, '.' = 0.10),
       dict = var_labs,
       digits = dig_num)


# model alt party FE

m1_robasp <- model_data %>% 
  mutate(asp = ifelse(party == 'socsuccessor', 'ASP', 'not')) %>% 
  feols(
    clr_vote ~ (p.ipn_sb + p.ipn_pzpr) | elect_year + woj_1975_real^asp,
    cluster = ~asp^woj_1975_real
  ) 

m2_robasp <- model_data %>% 
  mutate(asp = ifelse(party == 'socsuccessor', 'ASP', 'not')) %>% 
  feols(
    clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_cand/elect_num_cand) | elect_year + woj_1975_real^asp,
    cluster = ~asp^woj_1975_real
  ) 

m3_robasp <- model_data %>% 
  mutate(asp = ifelse(party == 'socsuccessor', 'ASP', 'not')) %>% 
  feols(
    clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_incumbent/party_num_cand) + I(party_num_cand/elect_num_cand)| elect_year + woj_1975_real^asp,
    cluster = ~asp^woj_1975_real
  ) 

model_robasp <- mget(paste0('m', 1:3, '_robasp'))
etable(model_robasp, tex = tex_val, signif.code = sig_codes,
       dict = var_labs,
       digits = dig_num)


# APPENDIX

# beta regression
m1_votescomb_beta <- feglm(
  beta_votes ~ I(p.ipn_sb + p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe,
  data = model_data,
  family = quasibinomial("logit"),
  cluster = ~party_camp_fe^woj_1975_real
)

m2_votescomb_beta <- feglm(
  beta_votes ~ I(p.ipn_sb + p.ipn_pzpr) + I(party_num_cand / elect_num_cand) | elect_year + woj_1975_real^party_camp_fe,
  data = model_data,
  family = quasibinomial("logit"),
  cluster = ~party_camp_fe^woj_1975_real
)

m3_votescomb_beta <- feglm(
  beta_votes ~ I(p.ipn_sb + p.ipn_pzpr) + I(party_num_cand / elect_num_cand) + I(party_num_incumbent/party_num_cand) | elect_year + woj_1975_real^party_camp_fe,
  data = model_data,
  family = quasibinomial("logit"),
  cluster = ~party_camp_fe^woj_1975_real
)


models_beta <- mget(paste0('m', 1:3, '_votescomb_beta'))
etable(models_beta, digits = dig_num, tex = tex_val, signif.code = sig_codes,
       dict = var_labs)


m1_votes_beta <- feglm(
  beta_votes ~ (p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe,
  data = model_data,
  family = quasibinomial("logit"),
  cluster = ~party_camp_fe^woj_1975_real
)

m2_votes_beta <- feglm(
  beta_votes ~ (p.ipn_sb) | elect_year + woj_1975_real^party_camp_fe,
  data = model_data,
  family = quasibinomial("logit"),
  cluster = ~party_camp_fe^woj_1975_real
)

m3_votes_beta <- feglm(
  beta_votes ~ (p.ipn_sb + p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe,
  data = model_data,
  family = quasibinomial("logit"),
  cluster = ~party_camp_fe^woj_1975_real
)

m4_votes_beta <- feglm(
  beta_votes ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_cand / elect_num_cand) | elect_year + woj_1975_real^party_camp_fe,
  data = model_data,
  family = quasibinomial("logit"),
  cluster = ~party_camp_fe^woj_1975_real
)

m5_votes_beta <- feglm(
  beta_votes ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_cand / elect_num_cand) + I(party_num_incumbent/party_num_cand) | elect_year + woj_1975_real^party_camp_fe,
  data = model_data,
  family = quasibinomial("logit"),
  cluster = ~party_camp_fe^woj_1975_real
)

plot(residuals(m3_votes_beta, type = "pearson") ~ fitted(m3_votes_beta))

m3_votes_beta$dispersion

models_beta <- mget(paste0('m', 1:5, '_votes_beta'))

for (m in models_beta){
  print(paste0('Model ', m$dispersion))
}

etable(models_beta, tex = tex_val, signif.code = sig_codes,
       dict = var_labs, 
       digits = dig_num)




# CLR with different SE clustering:


m1_votes_teryt <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb) | elect_year + teryt^party_camp_fe, cluster = ~party_camp_fe^teryt) 

m2_votes_teryt <- model_data %>% 
  feols(clr_vote ~ (p.ipn_pzpr) | elect_year + teryt^party_camp_fe,cluster = ~party_camp_fe^teryt) 

m3_votes_teryt <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb + p.ipn_pzpr) | elect_year + teryt^party_camp_fe,cluster = ~party_camp_fe^teryt) 


m4_votes_teryt <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_cand/elect_num_cand) | elect_year + teryt^party_camp_fe,cluster =~party_camp_fe^teryt) 


m5_votes_teryt <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_incumbent/party_num_cand) + I(party_num_cand/elect_num_cand) | elect_year + teryt^party_camp_fe, cluster = ~party_camp_fe^teryt) 


models_clr_teryt <- mget(paste0('m', 1:5, '_votes_teryt'))
etable(models_clr_teryt, tex = tex_val, signif.code = sig_codes, dict = var_labs,
       digits = dig_num)




m1_votes_teryt2 <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb) | elect_year^party_camp_fe, cluster = ~teryt) 

m2_votes_teryt2 <- model_data %>% 
  feols(clr_vote ~ (p.ipn_pzpr) | elect_year^party_camp_fe,cluster = ~teryt) 

m3_votes_teryt2 <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb + p.ipn_pzpr) | elect_year^party_camp_fe,cluster = ~teryt) 

m4_votes_teryt2 <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_cand/elect_num_cand) | elect_year^party_camp_fe,cluster =~teryt) 


m5_votes_teryt2 <- model_data %>% 
  feols(clr_vote ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_incumbent/party_num_cand) + I(party_num_cand/elect_num_cand) | elect_year^party_camp_fe, cluster = ~teryt) 


models_clr_teryt2 <- mget(paste0('m', 1:5, '_votes_teryt2'))
etable(models_clr_teryt2, tex = tex_val, signif.code = sig_codes, dict = var_labs,
       digits = dig_num)



# raw vote shares

m1_votes_raw <- model_data %>% 
  feols(party_prop_votes ~ (p.ipn_sb) | elect_year + woj_1975_real^party_camp_fe, cluster = ~party_camp_fe^woj_1975_real) 

m2_votes_raw <- model_data %>% 
  feols(party_prop_votes ~ (p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe, cluster = ~party_camp_fe^woj_1975_real) 

m3_votes_raw <- model_data %>% 
  feols(party_prop_votes ~ (p.ipn_sb + p.ipn_pzpr) | elect_year + woj_1975_real^party_camp_fe, cluster = ~party_camp_fe^woj_1975_real) 


m4_votes_raw <- model_data %>% 
  feols(party_prop_votes ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_cand/elect_num_cand) | elect_year + woj_1975_real^party_camp_fe, cluster = ~party_camp_fe^woj_1975_real) 


m5_votes_raw <- model_data %>% 
  feols(party_prop_votes ~ (p.ipn_sb + p.ipn_pzpr) + I(party_num_incumbent/party_num_cand) + I(party_num_cand/elect_num_cand) | elect_year + woj_1975_real^party_camp_fe,cluster = ~party_camp_fe^woj_1975_real) 


models_raw <- mget(paste0('m', 1:5, '_votes_raw'))
etable(models_raw, tex = tex_val, signif.code = sig_codes, dict = var_labs,
       digits = dig_num)













# where are insiders selected?

cands_covs %>% 
  group_by(teryt, elect_year) %>%
  slice(1) %>% 
  mutate(y = elect_num_ipn_list/elect_num_cand) %>% 
  ungroup %>% 
  mutate(insider_comp = (y * (n() - 1) + 0.5)/n()) %>% 
  feglm(insider_comp ~ comm_office:factor(elect_year) | elect_year + teryt, 
        family = quasibinomial('logit'),
        cluster = ~teryt) %>% 
  broom::tidy(., conf.int = T) %>% 
  mutate(year = as.numeric(substr(term, nchar(term) - 3, nchar(term)))) %>% 
  ggplot(aes(x = year, y = estimate))+
  geom_point()+
  geom_hline(yintercept = 0)+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high))+
  theme_bw()+
  labs(title = 'Proportion of insider candidates in all parties predicted by SB presence')
ggsave('4_plots/sb_on_insidershare.pdf')



# LOO robustness check for CLR specification

#### NOTE: ChatGPT was prompted to write the loop code and progress bar to do a leave-one-out test on the coefficients for p.ipn_pzpr and p.ipn_sb, where each LOO should occur within an election year-municipality
# The author uses an adapted version of this code here. All errors are the author's own.

library(purrr)

set.seed(2025)

base_model <- feols(
  clr_vote ~ p.ipn_sb + p.ipn_pzpr | 
    elect_year + woj_1975_real^party_camp_fe,
  data = model_data,
  cluster = ~party_camp_fe^woj_1975_real
)

base_coefs <- coef(base_model)[c("p.ipn_sb", "p.ipn_pzpr")]

k <- 500


loo_iter <- function() {
  
  data_loo <- model_data %>%
    group_by(teryt, elect_year) %>%
    sample_n(size = n() - 1) %>%
    ungroup()
  
  model <- tryCatch(
    feols(
      clr_vote ~ p.ipn_sb + p.ipn_pzpr | 
        elect_year + woj_1975_real^party_camp_fe,
      data = data_loo,
      cluster = ~party_camp_fe^woj_1975_real
    ),
    error = function(e) NULL
  )
  
  if (!is.null(model)) {
    return(coef(model)[c("p.ipn_sb", "p.ipn_pzpr")])
  } else {
    return(c(p.ipn_sb = NA, p.ipn_pzpr = NA))
  }
}


pb <- txtProgressBar(min = 0, max = k)

# will run long loop to get 
# results <- map_dfr(1:k, function(i) {
  res <- loo_iter()
  setTxtProgressBar(pb, i)
  return(as.data.frame(as.list(res)))
})
close(pb)


results_clean <- na.omit(results)

results_long <- results_clean %>%
  pivot_longer(cols = everything(), names_to = "term", values_to = "estimate")


# results_long %>% 
#   write_csv('3_output/LOO_coeffs.csv')



loo_read <- read_csv('5_replication/LOO_coeffs.csv')
pval_df <- loo_read %>% mutate(real_coef = ifelse(term == 'p.ipn_sb', base_coefs['p.ipn_sb'], base_coefs['p.ipn_pzpr'])) %>% 
  group_by(term) %>% 
  summarise(pval = mean(abs(estimate - mean(estimate, na.rm = TRUE)) >= abs(real_coef - mean(estimate, na.rm = TRUE))))  # two-sided empirical p-value 

pvals <- pval_df$pval
names(pvals) <- pval_df$term

ggplot(loo_read, aes(x = estimate)) +
  geom_histogram(fill = 'steelblue', alpha = 0.8)+
  geom_density(fill = "lightblue", alpha = 0.5) +
  facet_wrap(~term, scales = "free", labeller = as_labeller(c(`p.ipn_pzpr` = 'PZPR',
                                                              `p.ipn_sb` = 'SB'))) +
  geom_vline(aes(xintercept = base_coefs[term], linetype = 'treu'), color = "red", linetype = "dashed") +
  labs(
    title = "Non-parametric LOO test of PZPR and SB coefficients",
    subtitle = paste0("Red line = true model estimate\n",
                      "Empirical p-values: SB = ", round(pvals["p.ipn_sb"], 3),
                      ", PZPR = ", round(pvals["p.ipn_pzpr"], 3)),
    x = "estimate", y = "frequency"
  ) +
  theme_bw()
ggsave('4_plots/loo_test.pdf')






# graveyard

ipn_kruskal <- ipn_plot_data %>% 
  group_by(name, elect_year) %>% 
  filter(!(elect_year>=2002 & party_camp == 'postauthopp')) %>% 
  do(broom::tidy(kruskal.test(value ~ party_camp, .))) %>% 
  ungroup() %>%
  mutate(sig = case_when(
    p.value < sig_codes[1] ~ names(sig_codes)[1],
    p.value < sig_codes[2]  ~ names(sig_codes)[2],
    p.value < sig_codes[3]  ~ names(sig_codes)[3],
    p.value < sig_codes[4]  ~ names(sig_codes)[4],
    TRUE            ~ "n.s."
  )) 


ipn_plot_data %>% 
  filter(!name == 'mix') %>% 
  ggplot(aes(x = as.factor(elect_year), y = value, color = party_camp))+
  stat_summary_bin(size = 0.4, position = position_dodge(width = 0.2))+
  #stat_summary_bin(geom = 'errorbar', position = position_dodge(width = 0.2), width = 0.2, alpha = 0.8)+
  facet_wrap(~name, ncol = 3, scales = 'free_y',
             labeller = as_labeller(
               c(`mix` = '(A) PZPR share of insiders', 
                 `p.ipn_pzpr` = '(B) PZPR share of candidates', 
                 `p.ipn_sb` = '(C) SB share of candidates')))+
  scale_color_manual(
    values = c(
      #'goldenrod',
      'steelblue',
      'darkred',
      'darkgreen',
      'darkorchid4'
    ),
    labels = c(
      #'New Right',
      'Post-Authoritarian Opposition',
      'Successor Parties',
      'Traditionalist',
      'All other parties'
    )
  )+
  # geom_hline(yintercept = 0)+
  theme_bw()+
  theme(legend.position = 'bottom',
        legend.title.position = 'left',
  )+
  labs(color = 'Party Family',
       x = 'year',
       y = 'proportion')+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
  scale_y_continuous(expand = c(0.1,0))+
  geom_text(
    data = ipn_kruskal %>% filter(!name == 'mix'),
    aes(x = as.factor(elect_year), y = 0, label = sig),
    inherit.aes = FALSE,
    size = 3,
    vjust = 2,
    hjust = 0,
    color = "black"
  )
ipn_prop

# ggsave('4_plots/ipn_share_dynamics.pdf')

ipn_plot_data %>% 
  filter(name == 'mix' ) %>% 
  ggplot(aes(x = as.factor(elect_year), y = value, color = party_camp))+
  stat_summary_bin(size = 0.4, position = position_dodge(width = 0.2))+
  #stat_summary_bin(geom = 'errorbar', position = position_dodge(width = 0.2), width = 0.2)+
  facet_wrap(~name, ncol = 3, scales = 'free_y',
             labeller = as_labeller(
               c(`mix` = '(A) PZPR share of insiders', 
                 `p.ipn_pzpr` = '(B) PZPR share of candidates', 
                 `p.ipn_sb` = '(C) SB share of candidates')))+
  scale_color_manual(
    values = c(
      #'goldenrod',
      'steelblue',
      'darkred',
      'darkgreen',
      'darkorchid4'
    ),
    labels = c(
      #'New Right',
      'Post-Authoritarian Opposition',
      'Successor Parties',
      'Traditionalist',
      'All other parties'
    )
  )+
  geom_hline(yintercept = 0.5, alpha = 0.5)+
  theme_bw()+
  theme(legend.position = 'bottom',
        legend.title.position = 'left',
  )+
  labs(color = 'Party Family',
       x = '',
       y = 'proportion')+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
        legend.position = 'none')+
  scale_y_continuous(expand = c(0.2,0))+
  geom_text(
    data = ipn_kruskal %>% filter(name == 'mix'),
    aes(x = as.factor(elect_year), y = 0, label = sig),
    inherit.aes = FALSE,
    size = 3,
    vjust = 2,
    hjust = 0,
    color = "black"
  )
